{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 问题： 我们需要将字符串拆分为不同的字段，但是分隔符（以及分隔符之间的空格）\n",
    "# 在整个字符串中并不一致。\n",
    "\n",
    "# 解决方案 使用re.split() 方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']\n"
     ]
    }
   ],
   "source": [
    "import re\n",
    "line = 'asdf fjdk; afed, fjek,asdf,    foo'\n",
    "line_splited = re.split(r'[;,\\s]\\s*',line)\n",
    "print(line_splited)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['asdf', ' ', 'fjdk', ';', 'afed', ',', 'fjek', ',', 'asdf', ',', 'foo']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Herbert\\AppData\\Local\\conda\\conda\\envs\\py35\\lib\\re.py:203: FutureWarning: split() requires a non-empty pattern match.\n  return _compile(pattern, flags).split(string, maxsplit)\n"
     ]
    }
   ],
   "source": [
    "# 讨论：当使用re.split()时，需要小心正则表达式模式中的捕获组(capture group)是否包含在括号中。\n",
    "# 如果使用了捕获组，那么匹配的文本也会包含在最终结果中。如下：\n",
    "fields = re.split(r'(;|,|\\s|)\\s*', line)\n",
    "print(fields)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "asdf fjdk;afed,fjek,asdf,foo\n"
     ]
    }
   ],
   "source": [
    "# 在特定的上下文中获得分隔符字符也可能是有用的。\n",
    "# 例如，也许稍后要用到分隔字符来改进字符串的输出：\n",
    "values = fields[::2]\n",
    "# print(values)\n",
    "# 1.append()  向列表尾部追加一个新元素，列表只占一个索引位，在原有列表上增加\n",
    "# 2.extend() 向列表尾部追加一个列表，将列表中的每个元素都追加进来，在原有列表上增加\n",
    "# 3.+  直接用+号看上去与用extend()一样的效果，但是实际上是生成了一个新的列表存这两个列表的和，只能用在两个列表相加上\n",
    "# 4.+= 效果与extend()一样，向原列表追加一个新元素，在原有列表上增加\n",
    "delimiters = fields[1::2] + ['']\n",
    "\n",
    "# Reform the line using the same delimiters\n",
    "line_reform = ''.join(v+d for v, d in zip(values, delimiters))\n",
    "print\n",
    "print(line_reform)\n",
    "# line = 'asdf fjdk; afed, fjek,asdf,    foo'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']\n"
     ]
    }
   ],
   "source": [
    "# 如果不想在结果中看到分隔字符，但仍然用括号来对正则表达式模式进行分组，\n",
    "# 请确保用的是非捕获组，以（？：）的形式指定。\n",
    "words = re.split(r'(?:,|;|\\s)\\s*',line)\n",
    "print(words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    ""
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
